
load(merged_panel_data_loc) # load in merged dt.panel
# Estimate class-specific exponential decline curves.
fit.arps = function(data) {
  # data should be a 2 columns: 1 is production, 2 is age in months
  setnames(data, c('q','WellAge'))
  library(minpack.lm)
  nlsLM(q ~ qi/((1+b*a*WellAge)^(1/b)),
        data=data,
        start=list(qi=max(data[,q],na.rm=TRUE), a=0.1, b=1),
        control=nls.control(warnOnly=TRUE))
}

try.fit.arps = function(data, r = -0.1) {
  tryCatch(fit.arps(data), warning= function(w) {fit.arps(data)},
           error = function(e) {print('Arps failed to fit. Use exogenous decline')})
}

elapsed_months <- function(end_date, start_date) {
  ed <- as.POSIXlt(end_date)
  sd <- as.POSIXlt(start_date)
  12 * (ed$year - sd$year) + (ed$mon - sd$mon)
}

project.arps = function(dt.pan, 
                        data.end.date=as.Date('2018-12-01'), 
                        projection.end.date=sim.end.date, plot.pred=FALSE, r.exog=0.03) {
  # dt.pan should be a single well slice of dt.panel, containing entity_id, prod_date, liq, gas, and WellAge.
  # Do not include any other variables, since it eats up memory.
  
  # It fits the Arps function, then projects forward starting from projection.start+1 to projection.end.date.
  
  projection.length = elapsed_months(projection.end.date, data.end.date)
  projection.start.date = data.end.date + months(1)
  
  # Check that we have production through the end date. If not, return zero.
  if ( !(data.end.date %in% dt.pan[, prod_date]) ) return(0)
  # If production is already zero at the end of the series, project 0 into the future.
  if ( dt.pan[prod_date==data.end.date, liq==0] & dt.pan[prod_date==data.end.date, gas==0]) {
    proj.out = data.table(prod_date = seq.Date(projection.start.date, projection.end.date, by='months'),
                          WellAge = starting.WellAge + 1:projection.length)
    proj.out[, liq:=0]
    proj.out[, gas:=0]
    return(proj.out)
  }
  
  arps.fit.liq = try.fit.arps(dt.pan[,.(liq, WellAge)])
  arps.fit.gas = try.fit.arps(dt.pan[,.(gas, WellAge)])
  
  starting.WellAge = dt.pan[prod_date==data.end.date, max(WellAge)]
  
  proj.out = data.table(prod_date = seq.Date(projection.start.date, projection.end.date, by='months'),
                        WellAge = starting.WellAge + 1:projection.length)
  
  # If the NLS solved, use the predicted value. Otherwise use an exogenous 10% monthly decline.
  if (class(arps.fit.liq)=='nls') {
    proj.out[, liq := predict(arps.fit.liq, newdata=proj.out[,.(WellAge)])]
  } else {
    proj.out[, liq := dt.pan[.N, liq]*exp(-r.exog*( WellAge - dt.pan[.N, WellAge] ))]
    message(arps.fit.liq, ' for liq on entity ', dt.pan[, unique(entity_id)])
  }
  if (class(arps.fit.gas)=='nls') {
    proj.out[, gas := predict(arps.fit.gas, newdata=proj.out[,.(WellAge)])]
  } else {
    proj.out[, gas := dt.pan[.N, gas]*exp(-r.exog*( WellAge - dt.pan[.N, WellAge] ))]
    message(arps.fit.gas, ' for gas on entity ', dt.pan[, unique(entity_id)])
  }
  
  # Projections that go to zero end up as "NaN" in the nls prediction, since the function is strictly positive for all parameter values
  proj.out[is.na(liq), liq:=0]
  proj.out[is.na(gas), gas:=0]
  
  # Check if projected production rises by more than the tolerance (+20% of last 3 year average).
  # If it does, bring it from it's final value to zero at an exponential rate.
  tol = 1.2
  lag.years = 3
  if ( proj.out[1:2, diff(liq)>=0] ) { # if projection doesn't decline at the start of the projection, use exogenous decline
    warning(paste0('Nondecreasing liq production for entity ',dt.pan[,unique(entity_id)],'. Using exogenous decline.'))
    
    proj.out[, liq := dt.pan[.N, liq]*exp(-r.exog*( WellAge - dt.pan[.N, WellAge] ))]
  } 
  if ( proj.out[1:2, diff(gas)>=0] ) { # if projection doesn't decline at the start of the projection, use exogenous decline
    warning(paste0('Nondecreasing gas production for entity ',dt.pan[,unique(entity_id)],'. Using exogenous decline.'))
    
    proj.out[, gas := dt.pan[.N, gas]*exp(-r.exog*( WellAge - dt.pan[.N, WellAge] ))]
  } 
  
  # Plot actuals vs. prediction.
  if (plot.pred) {
    par(mfrow=c(1,2))
    plot(liq ~ WellAge, data=dt.pan[, .(liq, gas, WellAge)], type='l', xlim=c(0, proj.out[,max(WellAge)]), lwd=2)
    grid(); abline(h=0)
    if (class(arps.fit.liq)=='nls') lines(predict(arps.fit.liq) ~ dt.pan[, WellAge], col='red')
    lines(liq ~ WellAge, data=proj.out[, .(liq, gas, WellAge)], col='blue')
    plot(gas ~ WellAge, data=dt.pan[, .(liq, gas, WellAge)], type='l', xlim=c(0, proj.out[,max(WellAge)]), lwd=2)
    grid(); abline(h=0)
    if (class(arps.fit.gas)=='nls') lines(predict(arps.fit.gas) ~ dt.pan[, WellAge], col='red')
    lines(gas ~ WellAge, data=proj.out[, .(liq, gas, WellAge)], col='blue')
  }
  
  return(proj.out[, .(prod_date, liq, gas)]) # drop well age. no longer relevant.
}
# Test function
setkey(dt.panel, entity_id)
dt.panel[, WellAge := elapsed_months(prod_date, first_prod_month)]
dt.temp = dt.panel[entity_id==60928][,.(entity_id, prod_date, liq, gas, WellAge)]

project.arps(dt.temp, plot.pred=TRUE)

proj.arp.by.id = function(id, dt.panel, data.end.date, projection.end.date, plot.pred=FALSE) {
  project.arps(dt.panel[J(id)], data.end.date=data.end.date, 
               projection.end.date=projection.end.date, plot.pred=plot.pred)
}

# Split files into two (oil wells and gas wells) to save on memory
# For existing wells, merge main sample with old gas wells so we're not undercounting gas production
load(oldgas_loc)
dt.panel.oldgas = dt.panel[, .(entity_id, prod_type, fed.land, offshore, prod_date, first_prod_month, liq, gas)]
rm(dt.panel); gc()

load(merged_panel_data_loc)
dt.panel = rbind(dt.panel[, .(entity_id, prod_type, fed.land, offshore, prod_date, first_prod_month, liq, gas)],
                 dt.panel.oldgas)
rm(dt.panel.oldgas); gc()
# Convert to character so the full interactions are included.
dt.panel[, fed.land := as.character(fed.land)]
dt.panel[, offshore := as.character(offshore)]
dt.panel[, WellAge := elapsed_months(prod_date, first_prod_month)]
dt.panel = dt.panel[WellAge>0]

setkey(dt.panel, entity_id)
dt.panel.use = dt.panel[prod_type=='Oil']
key(dt.panel.use)
save('dt.panel.use', file=paste0(working.data.dir%&%'/dt_panel_Oil_',today(),'.RData'))
working.oil.file.name = paste0(working.data.dir%&%'/dt_panel_Oil_',today(),'.RData')
rm(dt.panel.use)
dt.panel.use = dt.panel[prod_type=='Gas']
save('dt.panel.use', file=paste0(working.data.dir%&%'/dt_panel_Gas_',today(),'.RData'))
working.gas.file.name = paste0(working.data.dir%&%'/dt_panel_Gas_',today(),'.RData')
rm(dt.panel)
rm(dt.panel.use)

data.end.date=as.Date('2018-12-01')
projection.end.date=as.Date('2050-12-01')
st.time = Sys.time()
difftime(st.time, run.start)

for (w in c('Oil','Gas')) {
  for (o in c('1','0')) {
    for (f in c('0','1')) {
      # Load oil or gas data. Only 1 at a time.
      if (w=='Oil') load(working.oil.file.name)
      if (w=='Gas') load(working.gas.file.name)
      
      dt.panel.use = dt.panel.use[offshore==o]
      dt.panel.use = dt.panel.use[fed.land==f]
      
      # Drop wells not producing as of data.end.date
      unique.ids = dt.panel.use[prod_date %in% data.end.date][gas>0 | liq>0][, unique(entity_id)]
      setkey(dt.panel.use, entity_id)
      dt.panel.use = dt.panel.use[J(unique.ids)]
      key(dt.panel.use)
      setkey(dt.panel.use, entity_id)
      
      unique.ids = dt.panel.use[,unique(entity_id)] 
      
      length(unique.ids)
      
      library(parallel)
      numcores = detectCores()
      
      cl = makeCluster(numcores)
      
      # Load necessary packages on cluster
      clusterEvalQ(cl, {library(lubridate); library(data.table)}) # library(minpack.lm)
      # Export Arps functions (etc.) to cluster
      clusterExport(cl, varlist=c('project.arps','fit.arps','try.fit.arps','elapsed_months'))
      
      # Test function
      proj.arp.by.id(unique.ids[1], dt.panel=dt.panel.use, 
                     data.end.date=data.end.date, 
                     projection.end.date=projection.end.date)
      
      mc.out = parLapply(cl=cl, X=unique.ids, fun=proj.arp.by.id, 
                         dt.panel=dt.panel.use, 
                         data.end.date=data.end.date, 
                         projection.end.date=projection.end.date)
      names(mc.out) = unique.ids
      stopCluster(cl)
      # Aggregate output and save
      prod.tot = do.call(rbind, mc.out)[, .(liq=sum(liq)/(365.25/12)/1e6, gas=sum(gas)/(365.25/12)/1e6), by=prod_date]
      
      # in million barrels per day, or bcf/d
      file.nm = paste0(working.data.dir%&%'/well_level_projections_',w,'_',ifelse(f=='0','Nonfederal','Federal'),
                       '_',ifelse(o=='1', 'offshore', 'onshore'),'_',today(),'.RData')
      save('mc.out', file=file.nm)
      file.nm = paste0(working.data.dir%&%'/aggregated_existing_well_projections_',w,'_',ifelse(f=='0','Nonfederal','Federal'),
                       '_',ifelse(o=='1', 'offshore', 'onshore'),'_',today(),'.RData')
      save('prod.tot', file=file.nm)
      message(paste0(w,' ',ifelse(f=='0','Nonfederal','Federal'),
                     ' ',ifelse(o=='1', 'offshore', 'onshore'), ' done. ', Sys.time()))
      rm(list=c('mc.out','prod.tot','dt.panel.use')); gc()
    }
  }
}
ed.time = Sys.time()
difftime(ed.time, st.time)

gc()

counter=1
for (w in c('Oil','Gas')) {
  for (o in c('1','0')) {
    for (f in c('0','1')) {
      file.nm = paste0(working.data.dir%&%'/aggregated_existing_well_projections_',w,'_',ifelse(f=='0','Nonfederal','Federal'),
                       '_',ifelse(o=='1', 'offshore', 'onshore'),'_',today(),'.RData')
      load(file.nm)
      prod.tot[, prod_type:=w]
      prod.tot[, fed.land:=as.numeric(f)]
      prod.tot[, offshore:=as.numeric(o)]
      
      if (counter==1) prod.tot.store = copy(prod.tot)
      if (counter>1) prod.tot.store = rbind(prod.tot.store, prod.tot)
      rm(prod.tot)
      counter = counter+1
    }
  }
}
prod.tot = prod.tot.store
rm(prod.tot.store)

prod.tot.agg = prod.tot[, .(liq=sum(liq), gas=sum(gas)), by=prod_date]
prod.tot.agg[year(prod_date)==2050]

save(prod.tot, file=paste0(working.data.dir%&%'/projected_production_',today(),'.RData'))
projected_well_level_production_data_loc = paste0(working.data.dir%&%'/projected_production_',today(),'.RData')
prod.tot.agg[, WellAge:=elapsed_months(prod_date, min(prod_date))+1]

prod.tot.hist = copy(tot.prod.by.fed.off.type)
prod.tot.hist[, land.type:=NULL]
prod.tot.hist[, offshore:=ifelse(offshore=='Onshore',0,1)]
setcolorder(prod.tot.hist, names(prod.tot))
# Keep only the dates that are input into the simulation, and don't appear in the forecast
prod.tot.hist = prod.tot.hist[prod_date %in% price.path[,unique(date)]]
prod.tot.hist = prod.tot.hist[prod_date<=sim.start.date]

# Stack historical and projected production from existing wells
prod.tot.hist[, prod_date := as.Date(prod_date)]
prod.tot = rbind(prod.tot.hist, prod.tot)
prod.tot = prod.tot[order(prod_type, fed.land, offshore, prod_date)]

# Go to wide: liquids production
prod.tot.liq.wide = dcast(prod.tot, prod_date ~ prod_type + fed.land + offshore, value.var='liq', fun.aggregate = mean)
setcolorder(prod.tot.liq.wide, c('prod_date',c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_0',
                                 c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_1'))
setnames(prod.tot.liq.wide, c('date',well.class.string))
prod.tot.liq.wide[, liq_Total := apply(prod.tot.liq.wide[, -'date'], 1, sum)]

# Go to wide: gas production
prod.tot.gas.wide = dcast(prod.tot, prod_date ~ prod_type + fed.land + offshore, value.var='gas', fun.aggregate = mean)
setcolorder(prod.tot.gas.wide, c('prod_date',c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_0',
                                 c('Oil_0','Gas_0','Oil_1','Gas_1')%&%'_1'))
setnames(prod.tot.gas.wide, c('date',well.class.string))
prod.tot.gas.wide[, gas_Total := apply(prod.tot.gas.wide[, -'date'], 1, sum)]

# Plot projected production from existing wells
palette(alpha(col_lookup, 1)) # Order: gas fed, gas nonfed, oil fed, oil nonfed
dev.off()
ex.start.date = year(sim.start.date)+1

for (lg in c('liq','gas')) {
  if (lg=='liq') prod.plot = copy(prod.tot.liq.wide)
  if (lg=='gas') prod.plot = copy(prod.tot.gas.wide)
  
  pdf(output.dir%&%lg%&%'_production_from_existing_wells_'%&%today()%&%'.pdf', family='serif', width=11, height=8.5)
  plot(Gas_Federal_Onshore ~ date, data=prod.plot[year(date)>=ex.start.date], type='l', lwd=3, col=1,
       xlab='', ylab=ifelse(lg=='liq','Oil Production (mb/d)','Gas Production (bcf/d)'),
       ylim=c(0,ifelse(lg=='liq',8,70)), xaxt='n', cex.lab=1.5, cex.axis=1.5)
  grid(nx=NA, ny=NULL)
  abline(v=seq.Date(as.Date('2025-01-01'), as.Date('2050-01-01'), by='5 years'), lty=3, col='lightgray')
  abline(v=as.Date('2019-01-01'), lty=3, col='lightgray')
  axis(side=1, at=as.Date('2019-01-01'), labels = 2019, cex.axis=1.5)
  axis(side=1, at=seq.Date(as.Date('2025-01-01'), as.Date('2050-01-01'), by='5 years'),
       labels = seq(2025, 2050, by=5), cex.axis=1.5)
  # lines(Gas_Federal_Onshore ~ date, data=prod.plot[year(date)>=ex.start.date], type='l', lwd=3, col=1)
  lines(Gas_Nonfederal_Onshore ~ date, data=prod.plot[year(date)>=ex.start.date], type='l', lwd=3, col=2)
  lines(Oil_Federal_Onshore ~ date, data=prod.plot[year(date)>=ex.start.date], type='l', lwd=3, col=3)
  lines(Oil_Nonfederal_Onshore ~ date, data=prod.plot[year(date)>=ex.start.date], type='l', lwd=3, col=4)
  lines(Gas_Federal_Offshore ~ date, data=prod.plot[year(date)>=ex.start.date], type='l', lwd=3, col=1, lty=2)
  lines(Gas_Nonfederal_Offshore ~ date, data=prod.plot[year(date)>=ex.start.date], type='l', lwd=3, col=2, lty=2)
  lines(Oil_Federal_Offshore ~ date, data=prod.plot[year(date)>=ex.start.date], type='l', lwd=3, col=3, lty=2)
  lines(Oil_Nonfederal_Offshore ~ date, data=prod.plot[year(date)>=ex.start.date], type='l', lwd=3, col=4, lty=2)
  legend('top',legend=c('Oil Wells, Nonfederal', 'Oil Wells, Federal',
                        'Gas Wells, Nonfederal', 'Gas Wells, Federal'),
         col=my.cols, lwd=3, lty=1, cex=1.5, bg='white')
  legend('topright',legend=c('Onshore', 'Offshore'), col=c('black'),
         lty=1:2, lwd=3,  cex=1.5, bg='white')
  dev.off()
}
